Use 'net-ftp-list' gem to parse ftp directory listing.

It also parses non-unix ftp servers, which have a different list format.

Guilherme J. Tramontina 9 anni fa
parent
commit
a2243590f6
4 ha cambiato i file con 50 aggiunte e 33 eliminazioni
  1. 1 0
      Gemfile
  2. 2 0
      Gemfile.lock
  3. 6 24
      app/models/agents/ftpsite_agent.rb
  4. 41 9
      spec/models/agents/ftpsite_agent_spec.rb

+ 1 - 0
Gemfile

@@ -59,6 +59,7 @@ gem 'faraday', '~> 0.9.0'
59 59
 gem 'faraday_middleware'
60 60
 gem 'typhoeus', '~> 0.6.3'
61 61
 gem 'nokogiri', '~> 1.6.1'
62
+gem 'net-ftp-list', '~> 3.2.8'
62 63
 
63 64
 gem 'wunderground', '~> 1.2.0'
64 65
 gem 'forecast_io', '~> 2.0.0'

+ 2 - 0
Gemfile.lock

@@ -188,6 +188,7 @@ GEM
188 188
     multipart-post (2.0.0)
189 189
     mysql2 (0.3.16)
190 190
     naught (1.0.0)
191
+    net-ftp-list (3.2.8)
191 192
     nokogiri (1.6.2.1)
192 193
       mini_portile (= 0.6.0)
193 194
     oauth2 (0.9.4)
@@ -391,6 +392,7 @@ DEPENDENCIES
391 392
   liquid (~> 2.6.1)
392 393
   mqtt
393 394
   mysql2 (~> 0.3.16)
395
+  net-ftp-list (~> 3.2.8)
394 396
   nokogiri (~> 1.6.1)
395 397
   pg
396 398
   protected_attributes (~> 1.0.8)

+ 6 - 24
app/models/agents/ftpsite_agent.rb

@@ -1,4 +1,5 @@
1 1
 require 'net/ftp'
2
+require 'net/ftp/list'
2 3
 require 'uri'
3 4
 require 'time'
4 5
 
@@ -105,34 +106,15 @@ module Agents
105 106
         # commands during iteration.
106 107
         list = ftp.list('-a')
107 108
 
108
-        month2year = {}
109
-
110 109
         list.each do |line|
111
-          mon, day, smtn, rest = line.split(' ', 9)[5..-1]
112
-
113
-          # Remove symlink target part if any
114
-          filename = rest[/\A(.+?)(?:\s+->\s|\z)/, 1]
115
-
110
+          entry = Net::FTP::List.parse line
111
+          filename = entry.basename
112
+          mtime = Time.parse(entry.mtime.to_s).utc
113
+          
116 114
           patterns.any? { |pattern|
117 115
             File.fnmatch?(pattern, filename)
118 116
           } or next
119 117
 
120
-          case smtn
121
-          when /:/
122
-            if year = month2year[mon]
123
-              mtime = Time.parse("#{mon} #{day} #{year} #{smtn} GMT")
124
-            else
125
-              log "Getting mtime of #{filename}"
126
-              mtime = ftp.mtime(filename)
127
-              month2year[mon] = mtime.year
128
-            end
129
-          else
130
-            # Do not bother calling MDTM for old files.  Losing the
131
-            # time part only makes a timestamp go backwards, meaning
132
-            # that it will trigger no new event.
133
-            mtime = Time.parse("#{mon} #{day} #{smtn} GMT")
134
-          end
135
-
136 118
           after < mtime or next
137 119
 
138 120
           yield filename, mtime
@@ -193,7 +175,7 @@ module Agents
193 175
         found_entries[filename]
194 176
       }.each { |filename|
195 177
         create_event :payload => {
196
-          'url' => (base_uri + filename).to_s,
178
+          'url' => "#{base_uri}#{filename}",
197 179
           'filename' => filename,
198 180
           'timestamp' => found_entries[filename],
199 181
         }

+ 41 - 9
spec/models/agents/ftpsite_agent_spec.rb

@@ -7,19 +7,23 @@ describe Agents::FtpsiteAgent do
7 7
       @site = {
8 8
         'expected_update_period_in_days' => 1,
9 9
         'url' => "ftp://ftp.example.org/pub/releases/",
10
-        'patterns' => ["example-*.tar.gz"],
10
+        'patterns' => ["example*.tar.gz"],
11 11
       }
12 12
       @checker = Agents::FtpsiteAgent.new(:name => "Example", :options => @site, :keep_events_for => 2)
13 13
       @checker.user = users(:bob)
14 14
       @checker.save!
15
-      stub(@checker).each_entry.returns { |block|
16
-        block.call("example-latest.tar.gz", Time.parse("2014-04-01T10:00:01Z"))
17
-        block.call("example-1.0.tar.gz",    Time.parse("2013-10-01T10:00:00Z"))
18
-        block.call("example-1.1.tar.gz",    Time.parse("2014-04-01T10:00:00Z"))
19
-      }
20 15
     end
21 16
 
22 17
     describe "#check" do
18
+
19
+      before do
20
+        stub(@checker).each_entry.returns { |block|
21
+          block.call("example latest.tar.gz", Time.parse("2014-04-01T10:00:01Z"))
22
+          block.call("example-1.0.tar.gz",    Time.parse("2013-10-01T10:00:00Z"))
23
+          block.call("example-1.1.tar.gz",    Time.parse("2014-04-01T10:00:00Z"))
24
+        }
25
+      end
26
+
23 27
       it "should validate the integer fields" do
24 28
         @checker.options['expected_update_period_in_days'] = "nonsense"
25 29
         lambda { @checker.save! }.should raise_error;
@@ -33,7 +37,7 @@ describe Agents::FtpsiteAgent do
33 37
           known_entries.sort_by(&:last).should == [
34 38
             ["example-1.0.tar.gz",    "2013-10-01T10:00:00Z"],
35 39
             ["example-1.1.tar.gz",    "2014-04-01T10:00:00Z"],
36
-            ["example-latest.tar.gz", "2014-04-01T10:00:01Z"],
40
+            ["example latest.tar.gz", "2014-04-01T10:00:01Z"],
37 41
           ]
38 42
         }
39 43
 
@@ -46,7 +50,7 @@ describe Agents::FtpsiteAgent do
46 50
         lambda { @checker.check }.should_not change { Event.count }
47 51
 
48 52
         stub(@checker).each_entry.returns { |block|
49
-          block.call("example-latest.tar.gz", Time.parse("2014-04-02T10:00:01Z"))
53
+          block.call("example latest.tar.gz", Time.parse("2014-04-02T10:00:01Z"))
50 54
 
51 55
           # In the long list format the timestamp may look going
52 56
           # backwards after six months: Oct 01 10:00 -> Oct 01 2013
@@ -62,7 +66,7 @@ describe Agents::FtpsiteAgent do
62 66
             ["example-1.0.tar.gz",    "2013-10-01T00:00:00Z"],
63 67
             ["example-1.1.tar.gz",    "2014-04-01T10:00:00Z"],
64 68
             ["example-1.2.tar.gz",    "2014-04-02T10:00:00Z"],
65
-            ["example-latest.tar.gz", "2014-04-02T10:00:01Z"],
69
+            ["example latest.tar.gz", "2014-04-02T10:00:01Z"],
66 70
           ]
67 71
         }
68 72
 
@@ -75,5 +79,33 @@ describe Agents::FtpsiteAgent do
75 79
         lambda { @checker.check }.should_not change { Event.count }
76 80
       end
77 81
     end
82
+
83
+    describe "#each_entry" do
84
+      before do
85
+        stub.any_instance_of(Net::FTP).list.returns [ # Windows format
86
+          "04-02-14  10:01AM            288720748 example latest.tar.gz",
87
+          "04-01-14  10:05AM            288720710 no-match-example.tar.gz"
88
+        ]
89
+        stub(@checker).open_ftp.yields Net::FTP.new
90
+      end
91
+
92
+      it "filters out files that don't match the given format" do
93
+        entries = []
94
+        @checker.each_entry { |a, b| entries.push [a, b] }
95
+
96
+        entries.size.should == 1
97
+        filename, mtime = entries.first
98
+        filename.should == 'example latest.tar.gz'
99
+        mtime.should == '2014-04-02T10:01:00Z'
100
+      end
101
+
102
+      it "filters out files that are older than the given date" do
103
+        @checker.options['after'] = '2015-10-21'
104
+        entries = []
105
+        @checker.each_entry { |a, b| entries.push [a, b] }
106
+        entries.size.should == 0
107
+      end
108
+    end
109
+
78 110
   end
79 111
 end